if (!require("pacman"))
install.packages("pacman")
pacman::p_load(tidyverse,
janitor,
colorspace,
broom,
fs,
scales,
ggthemes,
ggrepel,
patchwork,
ggimage,
jpeg,
glue,
grid,
forcats)
# set theme for ggplot2
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))
# set width of code output
options(width = 65)
# set figure parameters for knitr
knitr::opts_chunk$set(
fig.width = 7, # 7" width
fig.asp = 0.618, # the golden ratio
fig.retina = 3, # dpi multiplier for displaying HTML output on retina
fig.align = "center", # center align figures
dpi = 300 # higher dpi, sharper image
)HW 03
Initial Setup
1 - Du Bois challenge.
# 1. Base data
du_bois_income <- read_csv("data/income.csv")
# 2. Pivot + cleanup with reversed category order
du_bois_long <- du_bois_income |>
pivot_longer(cols = Rent:Other, names_to = "Category", values_to = "Expenditure") |>
mutate(
Category = fct_relevel(Category, "Rent", "Food", "Clothes", "Tax", "Other"), # For legend order
Class = fct_rev(fct_inorder(Class))
) |>
arrange(Class) |>
group_by(Class) |>
mutate(
CenterPos = cumsum(Expenditure) - 0.5 * Expenditure, # Recalculate positions
Label = paste0(Expenditure, "%"),
LabelColor = ifelse(Category == "Rent", "white", "black")
) |>
ungroup()
# 2.1 Stacked column reorderd with below fix
du_bois_long <- du_bois_long |>
mutate(Category = fct_rev(Category))
# 3. Colors (match reference)
colors <- c("Rent" = "black", "Food" = "purple", "Clothes" = "sienna1", "Tax" = "slategray1", "Other" = "snow2")
# 4. Background image
bg_path <- "images/du-bois-bg.jpg"
bg <- jpeg::readJPEG(bg_path)
bg_grob <- rasterGrob(bg, width = unit(1, "npc"), height = unit(1, "npc"), interpolate = TRUE)
# 5. Final Plot
ggplot(du_bois_long, aes(x = Class, y = Expenditure, fill = Category)) +
annotation_custom(bg_grob, xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf) +
geom_col(width = 0.5, color = "white") +
geom_text(aes(y = CenterPos, label = Label, color = LabelColor),
size = 3, show.legend = FALSE, family = "mono") +
scale_color_identity() +
scale_fill_manual(values = colors, breaks = c("Rent", "Food", "Clothes", "Tax", "Other")) +
scale_x_discrete(labels = c("1,000 $1,125 \nAND OVER ", "$750-1000 $880 ", "$500-750 $547 ", "$400-500 $433.82", "$300-400 $335.66", "$200-300 $249.45", "$100-200 $139.10")) +
coord_flip(clip = "off") +
labs(
x = NULL, y = NULL,
title = "INCOME AND EXPENDITURE OF 150 NEGRO FAMILIES IN ATLANTA, GA., U.S.A."
) +
theme_minimal(base_family = "mono") +
theme(
legend.position = "top",
plot.title = element_text(face = "bold", size = 11, hjust = 0.5),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 7),
text = element_text(size = 9),
legend.title = element_blank(),
legend.text = element_text(size = 9),
legend.key.size = unit(0.3, "cm")
) +
annotate(
geom = "text",
x = "$100-200",
y = -0,
label = "CLASS ACTUAL AVERAGE \n\n\n",
size = 2,
color = "black",
family = "mono",
hjust = 0
)2 - COVID survey - interpret
Interpretation of the COVID-19 Vaccine Attitudes Visualization
This visualization is packed with information about how medical and nursing students across the U.S. feel about the COVID-19 vaccine. It’s a grid of facets where each column represents a different statement about the vaccine, rated on a Likert scale (1 = Strongly Agree to 5 = Strongly Disagree), and each row breaks down the responses by demographic factors like age, gender, race, profession, and vaccination status. The points show the mean score for each group, and the error bars stretch from the 10th to 90th percentiles, giving a sense of how much opinions vary. The first row, labeled “All,” shows the overall responses without splitting by any demographic factors.
Overall Observations
Starting with the “All” row, it’s cool to see that most students lean toward positive views about the vaccine. For statements like “I believe the vaccine is safe,” “Getting the vaccine will make me feel safer at work,” and “I will recommend the vaccine to others,” the mean scores hover around 1.5 to 2, which means they’re generally agreeing—either strongly or somewhat. That feels right to me since these are future healthcare pros who probably trust science. But then, for “I am concerned about the safety and side effects of the vaccine,” the mean jumps to about 3 (neutral), and the error bars go from like 1.5 to 4.5. So, even though they trust the vaccine, a lot of them are still worried about side effects, which I get because it was developed so fast.
Example 1: Asian Respondents’ Mixed Feelings
One thing that caught my eye was in the “Race” category with Asian students. For “I believe the vaccine is safe,” the error bars are huge—like, they go from 1 to 5! That means some strongly agree it’s safe, while others totally disagree, which is wild variation. I didn’t expect that much difference within one group. But then, for “I will recommend the vaccine to others,” the mean is around 2, and the error bars are way tighter, maybe 1 to 3. That’s weird to me—if you’re all over the place on safety, I’d think you’d be unsure about recommending it too. Maybe they feel they should promote it, even if they’re not totally sold on it themselves, like it’s their duty or something during a pandemic.
Example 2: Nursing vs. Medical Students
Looking at the “Profession” row, I noticed nursing and medical students don’t line up as much as I thought they would. For stuff like “I trust the information I have received,” nursing students have tight error bars, maybe 1 to 2.5, so they’re pretty consistent. Medical students, though, have wider ones, like 1 to 4, showing more mixed opinions. I figured both groups would think alike since they’re both in healthcare and learning the same science. This makes me wonder if medical students are more skeptical because they’re digging into research more, while nursing students might just go with what they’re taught. It’s not what I expected!
Example 3: Vaccinated Students Still Worried
In the “Had COVID vaccine” row, students who said “Yes” to being vaccinated have means around 1 to 1.5 for positive statements like “I believe the vaccine is safe,” which makes sense—if you got it, you probably trust it. But for “I am concerned about side effects,” the mean is still around 3, with error bars from 1.5 to 4.5. That’s interesting because I thought if you’re vaccinated, you’d be less worried. It shows even people who took it aren’t totally chill about risks, which fits with how some might’ve gotten it for school or work but still have doubts about a new vaccine.
Wrapping Up
This plot shows that medical and nursing students mostly trust the COVID-19 vaccine, but they’re not all-in—side effect worries are real across the board. The differences between groups, like race or profession, add layers I didn’t expect, making it clear that even future doctors and nurses don’t all see it the same way. It’s pretty fascinating how complicated their views are!
3 - COVID survey - reconstruct
# Loading Data
covid_check <- read.csv("data/covid-survey.csv")
#head(covid)
# After watching the head of the data set we need to remove the first row which just says this - 'likert_survey' And doesn't have actual data
covid <- read.csv("data/covid-survey.csv", skip = 1)
dim(covid)[1] 1121 14
# I would have add [na = c(".", "")] during loading data but just to follow along the step doing it separatly below.
covid_filtered_na <- covid |>
mutate(across(-response_id, ~ na_if(trimws(.), ""))) |>
mutate(across(-response_id, ~ na_if(., "."))) |>
filter(if_any(-response_id, ~ !is.na(.)))
dim(covid_filtered_na)[1] 1111 14
# Updating Labels
covid_relabeled <- covid_filtered_na |>
mutate(
exp_already_vax = recode(exp_already_vax, `0` = "No", `1` = "Yes"),
exp_flu_vax = recode(exp_flu_vax, `0` = "No", `1` = "Yes"),
exp_profession = recode(exp_profession, `0` = "Medical", `1` = "Nursing"),
exp_gender = recode(exp_gender,
`0` = "Male",
`1` = "Female",
`3` = "Non-binary third gender",
`4` = "Prefer not to say"),
exp_race = recode(exp_race,
`1` = "American Indian / Alaskan Native",
`2` = "Asian",
`3` = "Black / African American",
`4` = "Native Hawaiian / Other Pacific Islander",
`5` = "White"),
exp_ethnicity = recode(exp_ethnicity,
`1` = "Hispanic / Latino",
`2` = "Non-Hispanic / Non-Latino"),
exp_age_bin = recode(exp_age_bin,
`0` = "<20",
`20` = "21–25",
`25` = "26–30",
`30` = ">30")
)
dim(covid_relabeled)[1] 1111 14
covid_survey_longer <- covid_relabeled |>
# This pivot longer combines all explanatory columns and their values into two columns. Pivoting multiple columns of data into two columns to be able to address easily while plotting.
pivot_longer(
cols = starts_with("exp_"),
names_to = "explanatory",
values_to = "explanatory_value"
) |>
filter(!is.na(explanatory_value)) |>
# This pivot longer combines all response value columns into and their values into two columns. Similar to the above one it will also help in plotting the data.
pivot_longer(
cols = starts_with("resp_"),
names_to = "response",
values_to = "response_value"
)
covid_survey_longer# A tibble: 43,428 × 5
response_id explanatory explanatory_value response
<int> <chr> <chr> <chr>
1 1 exp_profession Nursing resp_safety
2 1 exp_profession Nursing resp_confidence_…
3 1 exp_profession Nursing resp_concern_saf…
4 1 exp_profession Nursing resp_feel_safe_a…
5 1 exp_profession Nursing resp_will_recomm…
6 1 exp_profession Nursing resp_trust_info
7 1 exp_flu_vax Yes resp_safety
8 1 exp_flu_vax Yes resp_confidence_…
9 1 exp_flu_vax Yes resp_concern_saf…
10 1 exp_flu_vax Yes resp_feel_safe_a…
# ℹ 43,418 more rows
# ℹ 1 more variable: response_value <chr>
covid_survey_longer |>
distinct(explanatory_value)# A tibble: 19 × 1
explanatory_value
<chr>
1 Nursing
2 Yes
3 Male
4 Asian
5 Non-Hispanic / Non-Latino
6 26–30
7 Female
8 21–25
9 White
10 >30
11 Non-binary third gender
12 Hispanic / Latino
13 Black / African American
14 No
15 American Indian / Alaskan Native
16 Medical
17 <20
18 Prefer not to say
19 Native Hawaiian / Other Pacific Islander
covid_survey_longer <- covid_survey_longer |>
mutate(response_value = as.numeric(response_value))
covid_survey_summary_stats_by_group <- covid_survey_longer |>
group_by(explanatory, explanatory_value, response) |>
summarise(
mean = mean(response_value, na.rm = TRUE),
low = quantile(response_value, 0.1, na.rm = TRUE),
high = quantile(response_value, 0.9, na.rm = TRUE),
.groups = "drop"
)
covid_survey_summary_stats_by_group# A tibble: 126 × 6
explanatory explanatory_value response mean low high
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 exp_age_bin 21–25 resp_concern_… 3.32 2 5
2 exp_age_bin 21–25 resp_confiden… 1.31 1 2
3 exp_age_bin 21–25 resp_feel_saf… 1.20 1 2
4 exp_age_bin 21–25 resp_safety 1.95 1 5
5 exp_age_bin 21–25 resp_trust_in… 1.29 1 2
6 exp_age_bin 21–25 resp_will_rec… 1.09 1 1
7 exp_age_bin 26–30 resp_concern_… 3.35 1 5
8 exp_age_bin 26–30 resp_confiden… 1.40 1 2
9 exp_age_bin 26–30 resp_feel_saf… 1.29 1 2
10 exp_age_bin 26–30 resp_safety 2.16 1 5
# ℹ 116 more rows
covid_survey_summary_stats_all <- covid_survey_longer |>
group_by(response) |>
summarise(
mean = mean(response_value, na.rm = TRUE),
low = quantile(response_value, 0.1, na.rm = TRUE),
high = quantile(response_value, 0.9, na.rm = TRUE),
.groups = "drop"
) |>
mutate(
explanatory = "All",
explanatory_value = ""
)
covid_survey_summary_stats_all# A tibble: 6 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <chr>
1 resp_concern_s… 3.28 1 5 All ""
2 resp_confidenc… 1.43 1 2 All ""
3 resp_feel_safe… 1.36 1 2 All ""
4 resp_safety 2.03 1 5 All ""
5 resp_trust_info 1.40 1 2 All ""
6 resp_will_reco… 1.21 1 2 All ""
# checking distint values to see if all variable values are coming
covid_survey_summary_stats_all |>
distinct(explanatory_value)# A tibble: 1 × 1
explanatory_value
<chr>
1 ""
# Binding the rows
covid_survey_summary_stats <- bind_rows(
covid_survey_summary_stats_all,
covid_survey_summary_stats_by_group
)
covid_survey_summary_stats# A tibble: 132 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <chr>
1 resp_concern_… 3.28 1 5 All ""
2 resp_confiden… 1.43 1 2 All ""
3 resp_feel_saf… 1.36 1 2 All ""
4 resp_safety 2.03 1 5 All ""
5 resp_trust_in… 1.40 1 2 All ""
6 resp_will_rec… 1.21 1 2 All ""
7 resp_concern_… 3.32 2 5 exp_age_bin "21–25"
8 resp_confiden… 1.31 1 2 exp_age_bin "21–25"
9 resp_feel_saf… 1.20 1 2 exp_age_bin "21–25"
10 resp_safety 1.95 1 5 exp_age_bin "21–25"
# ℹ 122 more rows
# Relevel factors
covid_survey_summary_stats <- covid_survey_summary_stats |>
mutate(
explanatory_value = fct_relevel(explanatory_value, ">30", "26-30", "21-25", "<20"),
explanatory = fct_relevel(explanatory, "All", "exp_age_bin", "exp_gender", "exp_race", "exp_ethnicity", "exp_profession", "exp_already_vax", "exp_flu_vax"),
explanatory_value = fct_relevel(explanatory_value, "Male", "Female", "Non-binary third gender", "Prefer not to say"),
explanatory_value = fct_relevel(explanatory_value, "American Indian/Alaskan Native", "Asian", "Black/African American", "Native Hawaiian/Other Pacific Islander", "White"),
explanatory_value = fct_relevel(explanatory_value, "Non-Hispanic/Non-Latino", "Hispanic/Latino"),
explanatory_value = fct_relevel(explanatory_value, "Medical", "Nursing"),
explanatory_value = fct_relevel(explanatory_value, "No", "Yes")
)
# Set labels
covid_survey_summary_stats <- covid_survey_summary_stats |>
mutate(
explanatory = recode(explanatory, "exp_age_bin" = 'Age', "exp_gender" = 'Gender', "exp_race" = 'Race', "exp_ethnicity" = 'Ethnicity', "exp_profession" = 'Profession', "exp_already_vax" = 'Had COVID vaccine', "exp_flu_vax" = 'Had flu vaccine this year'),
response = fct_relevel(response, "resp_safety", "resp_feel_safe_at_work", "resp_concern_safety", "resp_confidence_science", "resp_trust_info", "resp_will_recommend"),
response = recode(response,
"resp_safety" = 'Based on my understanding, I believe the vaccine is safe',
"resp_confidence_science" = 'I am confident in the scientific vetting process for the new COVID vaccines',
"resp_feel_safe_at_work" = 'Getting the vaccine will make me feel safer at work',
"resp_will_recommend" = 'I will recommend the vaccine to family, friends, and community members',
"resp_trust_info" = 'I trust the information that I have received about the vaccines',
"resp_concern_safety" = 'I am concerned about the safety and side effects of the vaccine')
)
# Creating the plot
covid_survey_summary_stats |>
ggplot(aes(x = mean, y = factor(explanatory_value))) +
geom_point(size = 0.75) +
geom_errorbarh(aes(xmin = low, xmax = high), height = 0.3) +
facet_grid(
rows = vars(explanatory),
cols = vars(response),
scales = "free_y",
space = "free_y",
labeller = labeller(explanatory = label_wrap_gen(15), response = label_wrap_gen(15))
) +
scale_x_continuous(breaks = 1:5) +
labs(
x = "Mean Likert score\n(Error bars range from 10th to 90th percentile)",
y = NULL,
) +
theme(
strip.text = element_text(size = 6),
strip.text.y = element_text(angle = 0),
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
panel.spacing = unit(0, "lines"),
panel.spacing.x = unit(0.3, "lines"),
axis.title.x = element_text(size = 8),
panel.grid = element_blank(),
strip.background = element_rect(fill = "gray90", color = "black")
)4 - COVID survey - re-reconstruct
# Calculating summary statistics for identity groups
covid_survey_summary_stats_by_group_2 <- covid_survey_longer |>
group_by(explanatory, explanatory_value, response) |>
summarise(
mean = mean(response_value, na.rm = TRUE),
low = quantile(response_value, 0.25, na.rm = TRUE),
high = quantile(response_value, 0.75, na.rm = TRUE),
.groups = "drop"
)
# Calculating summary statistics for all
covid_survey_summary_stats_all_2 <- covid_survey_longer |>
group_by(response) |>
summarise(
mean = mean(response_value, na.rm = TRUE),
low = quantile(response_value, 0.25, na.rm = TRUE),
high = quantile(response_value, 0.75, na.rm = TRUE),
.groups = "drop"
) |>
mutate(
explanatory = "All",
explanatory_value = ""
)
# Binding the rows
covid_survey_summary_stats_2 <- bind_rows(
covid_survey_summary_stats_all_2,
covid_survey_summary_stats_by_group_2
)
# Relevels factors, Set Labels, and Create Plot
# Relevel factors
covid_survey_summary_stats_2 <- covid_survey_summary_stats_2 |>
mutate(
explanatory_value = fct_relevel(explanatory_value, ">30", "26-30", "21-25", "<20"),
explanatory = fct_relevel(explanatory, "All", "exp_age_bin", "exp_gender", "exp_race", "exp_ethnicity", "exp_profession", "exp_already_vax", "exp_flu_vax"),
explanatory_value = fct_relevel(explanatory_value, "Male", "Female", "Non-binary third gender", "Prefer not to say"),
explanatory_value = fct_relevel(explanatory_value, "American Indian/Alaskan Native", "Asian", "Black/African American", "Native Hawaiian/Other Pacific Islander", "White"),
explanatory_value = fct_relevel(explanatory_value, "Non-Hispanic/Non-Latino", "Hispanic/Latino"),
explanatory_value = fct_relevel(explanatory_value, "Medical", "Nursing"),
explanatory_value = fct_relevel(explanatory_value, "No", "Yes")
)
# Set labels
covid_survey_summary_stats_2 <- covid_survey_summary_stats_2 |>
mutate(
explanatory = recode(explanatory, "exp_age_bin" = 'Age', "exp_gender" = 'Gender', "exp_race" = 'Race', "exp_ethnicity" = 'Ethnicity', "exp_profession" = 'Profession', "exp_already_vax" = 'Had COVID vaccine', "exp_flu_vax" = 'Had flu vaccine this year'),
response = fct_relevel(response, "resp_safety", "resp_feel_safe_at_work", "resp_concern_safety", "resp_confidence_science", "resp_trust_info", "resp_will_recommend"),
response = recode(response,
"resp_safety" = 'Based on my understanding, I believe the vaccine is safe',
"resp_confidence_science" = 'I am confident in the scientific vetting process for the new COVID vaccines',
"resp_feel_safe_at_work" = 'Getting the vaccine will make me feel safer at work',
"resp_will_recommend" = 'I will recommend the vaccine to family, friends, and community members',
"resp_trust_info" = 'I trust the information that I have received about the vaccines',
"resp_concern_safety" = 'I am concerned about the safety and side effects of the vaccine')
)
# Creating the plot
covid_survey_summary_stats_2 |>
ggplot(aes(x = mean, y = factor(explanatory_value))) +
geom_point(size = 0.75) +
geom_errorbarh(aes(xmin = low, xmax = high), height = 0.5) +
facet_grid(
rows = vars(explanatory),
cols = vars(response),
scales = "free_y",
space = "free_y",
labeller = labeller(explanatory = label_wrap_gen(15), response = label_wrap_gen(15))
) +
scale_x_continuous(breaks = 1:5) +
labs(
x = "Mean Likert score\n(Error bars range from 25th to 75th percentile)",
y = NULL,
) +
theme(
strip.text = element_text(size = 6),
strip.text.y = element_text(angle = 0),
axis.text.y = element_text(size = 6),
axis.text.x = element_text(size = 6),
panel.spacing = unit(0, "lines"),
panel.spacing.x = unit(0.3, "lines"),
axis.title.x = element_text(size = 8),
panel.grid = element_blank(),
strip.background = element_rect(fill = "gray90", color = "black")
)
This plot differs quite a bit from the one in question 3 because the error bars now reflect the 25th to 75th percentiles (interquartile range) instead of the 10th to 90th percentiles, shrinking the range by 50 percentiles overall. This change makes the error bars much tighter around the mean Likert scores, giving a clearer picture of where most responses cluster rather than the full spread. It’s especially noticeable in columns like “I believe the vaccine is safe” and “I am concerned about side effects,” where the error bars are now more concentrated, even when broken down by factors like age or race. Comparing this to the previous plot feels like switching from a wide scatter to something closer to a box plot, showing the middle 50% of responses more clearly.
One cool thing that stands out is that non-binary third gender respondents’ error bar for “I believe the vaccine is safe” looks pretty similar to before, still hovering around a neutral mean with a tight range. This might suggest a small sample size, which could explain the lack of change. Another new observation is that for “Had COVID vaccine” (Yes vs. No), the “Yes” group shows a tight error bar around 1-2 for “I will recommend the vaccine,” indicating strong agreement, while the “No” group spreads wider toward 3-4, showing more uncertainty. I didn’t expect that big a difference—maybe those who didn’t get it are less confident in pushing it on others.
Overall, the shorter error bars suggest the data is more clustered between the 25th and 75th percentiles, which aligns with the Central Limit Theorem—larger sample sizes (likely over 30 here) tend to push distributions toward normal. This tighter focus helps me see that most medical and nursing students are pretty consistent in their views, especially on trust and recommendation, even if some worry about side effects.
5 - COVID survey - another view
# Read the CSV file
covid <- read.csv("data/covid-survey.csv", skip = 1, na = c(".", ""))
# Filter out rows with all NA in response columns
covid_filtered_na <- covid |>
mutate(across(-response_id, ~ na_if(trimws(.), ""))) |>
mutate(across(-response_id, ~ na_if(., "."))) |>
filter(if_any(-response_id, ~ !is.na(.)))
# Pivot the data longer
covid_survey_longer <- covid_filtered_na |>
pivot_longer(
cols = starts_with("resp_"),
names_to = "response",
values_to = "response_value"
) |>
mutate(
response_value = factor(response_value, levels = c("1", "2", "3", "4", "5"))
) |>
mutate(
response = recode(response,
"resp_safety" = 'Based on my understanding, I believe the vaccine is safe',
"resp_confidence_science" = 'I am confident in the scientific vetting process for the new COVID vaccines',
"resp_feel_safe_at_work" = 'Getting the vaccine will make me feel safer at work',
"resp_will_recommend" = 'I will recommend the vaccine to family, friends, and community members',
"resp_trust_info" = 'I trust the information that I have received about the vaccines',
"resp_concern_safety" = 'I am concerned about the safety and side effects of the vaccine')
) |>
mutate(
response = factor(response, levels = c(
'I am concerned about the safety and side effects of the vaccine',
'I am confident in the scientific vetting process for the new COVID vaccines',
'Based on my understanding, I believe the vaccine is safe',
'Getting the vaccine will make me feel safer at work',
'I will recommend the vaccine to family, friends, and community members',
'I trust the information that I have received about the vaccines'
))
)
# Clean and set response_value factor
covid_survey_clean <- covid_survey_longer |>
filter(!is.na(response_value)) |>
mutate(
response_value = factor(
response_value,
levels = c("1", "2", "3", "4", "5"),
labels = c("Strongly Agree", "Agree", "Neutral", "Disagree", "Strongly Disagree"),
ordered = TRUE
)
)
# Summarize data for plotting
covid_survey_summary <- covid_survey_clean |>
count(response, response_value) |>
group_by(response) |>
mutate(percent = n / sum(n) * 100)
# Define colors matching the description
colors <- c("Strongly Agree" = "pink", "Agree" = "lightblue",
"Neutral" = "green", "Disagree" = "yellow",
"Strongly Disagree" = "orange")
covid_survey_summary <- covid_survey_summary |>
mutate(response_value = fct_rev(response_value))
covid_survey_summary <- covid_survey_summary |>
mutate(
Category = fct_relevel(response_value, "Strongly Agree", "Agree", "Neutral", "Disagree", "Strongly Disagree"), # For legend order
)
head(covid_survey_summary)# A tibble: 6 × 5
# Groups: response [2]
response response_value n percent Category
<fct> <ord> <int> <dbl> <ord>
1 I am concerned about the… Strongly Agree 116 11.7 Strongl…
2 I am concerned about the… Agree 238 23.9 Agree
3 I am concerned about the… Neutral 110 11.1 Neutral
4 I am concerned about the… Disagree 312 31.4 Disagree
5 I am concerned about the… Strongly Disa… 218 21.9 Strongl…
6 I am confident in the sc… Strongly Agree 694 69.7 Strongl…
# Create the plot
ggplot(covid_survey_summary, aes(
x = percent,
y = response,
fill = response_value
)) +
geom_col(width = 0.6) +
scale_fill_manual(values = colors) +
labs(
x = "Percentage",
y = NULL,
fill = "Response",
title = "Majority of students confident about \n the safety of the COVID-19 vaccine",
subtitle = "Likert scale from strongly disagree to strongly agree",
caption = "Source: Shah et al., Johns Hopkins School of Medicine"
) +
scale_x_continuous(labels = scales::percent_format(scale = 1)) +
scale_y_discrete(labels = function(x) str_wrap(x, width = 35)) +
guides(fill = guide_legend(reverse = TRUE)) +
theme_minimal() +
theme(
legend.position = "top",
plot.title = element_text(face = "bold", size = 11, hjust = 0.5),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 7),
text = element_text(size = 9),
legend.title = element_blank(),
legend.text = element_text(size = 9),
legend.key.size = unit(0.3, "cm")
)# Note: I tried to reverse the plot bar but couldn't do it, not sure what's that thing I'm missing.
# I was able to resolve my issue with reversing the order using fct_rev on response_valueAlt Text
This stacked bar plot, titled “Majority of students confident about the safety of the COVID-19 vaccine,” illustrates student perceptions of the COVID-19 vaccine across six different statements. The responses are based on a Likert scale where 1 signifies strong agreement and 5 indicates strong disagreement.
The data reveals a high level of agreement among students for several key statements:
- Around 90% to 92% of students either strongly or somewhat agreed with the following:
- "I will recommend the vaccine to family, friends, and community members."
- "I trust the information that I have received about the vaccines."
- "I am confident in the scientific vetting process for the new COVID vaccines."
- "Getting the vaccine will make me feel safer at work."
However, agreement was slightly lower for the statement, “Based on my understanding, I believe the vaccine is safe,” coming in at approximately 75%.
Conversely, when asked about concerns regarding vaccine safety and side effects (“I am concerned about the safety and side effects of the vaccine”), only about 35% of students expressed agreement, while 55% disagreed.
This data was collected by Shah et al. from Johns Hopkins School of Medicine.